library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble  3.1.6     ✓ dplyr   1.0.8
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   2.1.2     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggpubr)
library(dplyr)
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Loaded glmnet 4.1-3
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## The following object is masked from 'package:plotly':
## 
##     select
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
library(class)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
library(plotly)
library(countrycode)
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(broom)
library(rstatix)
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:stats':
## 
##     filter
library(rgl)
CovidWHO <- read_csv("https://covid19.who.int/WHO-COVID-19-global-data.csv")
## Rows: 190074 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Country_code, Country, WHO_region
## dbl  (4): New_cases, Cumulative_cases, New_deaths, Cumulative_deaths
## date (1): Date_reported
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
VaccineCovid <- read_csv("https://covid19.who.int/who-data/vaccination-data.csv")
## Rows: 228 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): COUNTRY, ISO3, WHO_REGION, DATA_SOURCE, VACCINES_USED
## dbl  (7): TOTAL_VACCINATIONS, PERSONS_VACCINATED_1PLUS_DOSE, TOTAL_VACCINATI...
## date (2): DATE_UPDATED, FIRST_VACCINE_DATE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
CovidWHO_2021_03_01 <- filter(CovidWHO, Date_reported == "2021-03-01")
CovidWHO_2022_03_01 <- filter(CovidWHO, Date_reported == "2022-03-01")
One_Year_Cum <- data.frame(CovidWHO_2021_03_01$Country, CovidWHO_2021_03_01$WHO_region, CovidWHO_2022_03_01$Cumulative_cases - CovidWHO_2021_03_01$Cumulative_cases, CovidWHO_2022_03_01$Cumulative_deaths - CovidWHO_2021_03_01$Cumulative_deaths)
names(One_Year_Cum)[1] <- 'COUNTRY'
names(One_Year_Cum)[2] <- 'WHO_REGION'
names(One_Year_Cum)[3] <- 'CaseCum'
names(One_Year_Cum)[4] <- 'DeathCum'


One_Year_Cum
##                                                      COUNTRY WHO_REGION
## 1                                                Afghanistan       EMRO
## 2                                                    Albania       EURO
## 3                                                    Algeria       AFRO
## 4                                             American Samoa       WPRO
## 5                                                    Andorra       EURO
## 6                                                     Angola       AFRO
## 7                                                   Anguilla       AMRO
## 8                                        Antigua and Barbuda       AMRO
## 9                                                  Argentina       AMRO
## 10                                                   Armenia       EURO
## 11                                                     Aruba       AMRO
## 12                                                 Australia       WPRO
## 13                                                   Austria       EURO
## 14                                                Azerbaijan       EURO
## 15                                                   Bahamas       AMRO
## 16                                                   Bahrain       EMRO
## 17                                                Bangladesh      SEARO
## 18                                                  Barbados       AMRO
## 19                                                   Belarus       EURO
## 20                                                   Belgium       EURO
## 21                                                    Belize       AMRO
## 22                                                     Benin       AFRO
## 23                                                   Bermuda       AMRO
## 24                                                    Bhutan      SEARO
## 25                          Bolivia (Plurinational State of)       AMRO
## 26                                                   Bonaire       AMRO
## 27                                    Bosnia and Herzegovina       EURO
## 28                                                  Botswana       AFRO
## 29                                                    Brazil       AMRO
## 30                                    British Virgin Islands       AMRO
## 31                                         Brunei Darussalam       WPRO
## 32                                                  Bulgaria       EURO
## 33                                              Burkina Faso       AFRO
## 34                                                   Burundi       AFRO
## 35                                                Cabo Verde       AFRO
## 36                                                  Cambodia       WPRO
## 37                                                  Cameroon       AFRO
## 38                                                    Canada       AMRO
## 39                                            Cayman Islands       AMRO
## 40                                  Central African Republic       AFRO
## 41                                                      Chad       AFRO
## 42                                                     Chile       AMRO
## 43                                                     China       WPRO
## 44                                                  Colombia       AMRO
## 45                                                   Comoros       AFRO
## 46                                                     Congo       AFRO
## 47                                              Cook Islands       WPRO
## 48                                                Costa Rica       AMRO
## 49                                             Côte d’Ivoire       AFRO
## 50                                                   Croatia       EURO
## 51                                                      Cuba       AMRO
## 52                                                   Curaçao       AMRO
## 53                                                    Cyprus       EURO
## 54                                                   Czechia       EURO
## 55                     Democratic People's Republic of Korea      SEARO
## 56                          Democratic Republic of the Congo       AFRO
## 57                                                   Denmark       EURO
## 58                                                  Djibouti       EMRO
## 59                                                  Dominica       AMRO
## 60                                        Dominican Republic       AMRO
## 61                                                   Ecuador       AMRO
## 62                                                     Egypt       EMRO
## 63                                               El Salvador       AMRO
## 64                                         Equatorial Guinea       AFRO
## 65                                                   Eritrea       AFRO
## 66                                                   Estonia       EURO
## 67                                                  Eswatini       AFRO
## 68                                                  Ethiopia       AFRO
## 69                               Falkland Islands (Malvinas)       AMRO
## 70                                             Faroe Islands       EURO
## 71                                                      Fiji       WPRO
## 72                                                   Finland       EURO
## 73                                                    France       EURO
## 74                                             French Guiana       AMRO
## 75                                          French Polynesia       WPRO
## 76                                                     Gabon       AFRO
## 77                                                    Gambia       AFRO
## 78                                                   Georgia       EURO
## 79                                                   Germany       EURO
## 80                                                     Ghana       AFRO
## 81                                                 Gibraltar       EURO
## 82                                                    Greece       EURO
## 83                                                 Greenland       EURO
## 84                                                   Grenada       AMRO
## 85                                                Guadeloupe       AMRO
## 86                                                      Guam       WPRO
## 87                                                 Guatemala       AMRO
## 88                                                  Guernsey       EURO
## 89                                                    Guinea       AFRO
## 90                                             Guinea-Bissau       AFRO
## 91                                                    Guyana       AMRO
## 92                                                     Haiti       AMRO
## 93                                                  Holy See       EURO
## 94                                                  Honduras       AMRO
## 95                                                   Hungary       EURO
## 96                                                   Iceland       EURO
## 97                                                     India      SEARO
## 98                                                 Indonesia      SEARO
## 99                                Iran (Islamic Republic of)       EMRO
## 100                                                     Iraq       EMRO
## 101                                                  Ireland       EURO
## 102                                              Isle of Man       EURO
## 103                                                   Israel       EURO
## 104                                                    Italy       EURO
## 105                                                  Jamaica       AMRO
## 106                                                    Japan       WPRO
## 107                                                   Jersey       EURO
## 108                                                   Jordan       EMRO
## 109                                               Kazakhstan       EURO
## 110                                                    Kenya       AFRO
## 111                                                 Kiribati       WPRO
## 112                                                Kosovo[1]       EURO
## 113                                                   Kuwait       EMRO
## 114                                               Kyrgyzstan       EURO
## 115                         Lao People's Democratic Republic       WPRO
## 116                                                   Latvia       EURO
## 117                                                  Lebanon       EMRO
## 118                                                  Lesotho       AFRO
## 119                                                  Liberia       AFRO
## 120                                                    Libya       EMRO
## 121                                            Liechtenstein       EURO
## 122                                                Lithuania       EURO
## 123                                               Luxembourg       EURO
## 124                                               Madagascar       AFRO
## 125                                                   Malawi       AFRO
## 126                                                 Malaysia       WPRO
## 127                                                 Maldives      SEARO
## 128                                                     Mali       AFRO
## 129                                                    Malta       EURO
## 130                                         Marshall Islands       WPRO
## 131                                               Martinique       AMRO
## 132                                               Mauritania       AFRO
## 133                                                Mauritius       AFRO
## 134                                                  Mayotte       AFRO
## 135                                                   Mexico       AMRO
## 136                         Micronesia (Federated States of)       WPRO
## 137                                                   Monaco       EURO
## 138                                                 Mongolia       WPRO
## 139                                               Montenegro       EURO
## 140                                               Montserrat       AMRO
## 141                                                  Morocco       EMRO
## 142                                               Mozambique       AFRO
## 143                                                  Myanmar      SEARO
## 144                                                  Namibia       AFRO
## 145                                                    Nauru       WPRO
## 146                                                    Nepal      SEARO
## 147                                              Netherlands       EURO
## 148                                            New Caledonia       WPRO
## 149                                              New Zealand       WPRO
## 150                                                Nicaragua       AMRO
## 151                                                    Niger       AFRO
## 152                                                  Nigeria       AFRO
## 153                                                     Niue       WPRO
## 154                                          North Macedonia       EURO
## 155           Northern Mariana Islands (Commonwealth of the)       WPRO
## 156                                                   Norway       EURO
## 157 occupied Palestinian territory, including east Jerusalem       EMRO
## 158                                                     Oman       EMRO
## 159                                                    Other      Other
## 160                                                 Pakistan       EMRO
## 161                                                    Palau       WPRO
## 162                                                   Panama       AMRO
## 163                                         Papua New Guinea       WPRO
## 164                                                 Paraguay       AMRO
## 165                                                     Peru       AMRO
## 166                                              Philippines       WPRO
## 167                                         Pitcairn Islands       WPRO
## 168                                                   Poland       EURO
## 169                                                 Portugal       EURO
## 170                                              Puerto Rico       AMRO
## 171                                                    Qatar       EMRO
## 172                                        Republic of Korea       WPRO
## 173                                      Republic of Moldova       EURO
## 174                                                  Réunion       AFRO
## 175                                                  Romania       EURO
## 176                                       Russian Federation       EURO
## 177                                                   Rwanda       AFRO
## 178                                                     Saba       AMRO
## 179                                         Saint Barthélemy       AMRO
## 180                                             Saint Helena       AFRO
## 181                                    Saint Kitts and Nevis       AMRO
## 182                                              Saint Lucia       AMRO
## 183                                             Saint Martin       AMRO
## 184                                Saint Pierre and Miquelon       AMRO
## 185                         Saint Vincent and the Grenadines       AMRO
## 186                                                    Samoa       WPRO
## 187                                               San Marino       EURO
## 188                                    Sao Tome and Principe       AFRO
## 189                                             Saudi Arabia       EMRO
## 190                                                  Senegal       AFRO
## 191                                                   Serbia       EURO
## 192                                               Seychelles       AFRO
## 193                                             Sierra Leone       AFRO
## 194                                                Singapore       WPRO
## 195                                           Sint Eustatius       AMRO
## 196                                             Sint Maarten       AMRO
## 197                                                 Slovakia       EURO
## 198                                                 Slovenia       EURO
## 199                                          Solomon Islands       WPRO
## 200                                                  Somalia       EMRO
## 201                                             South Africa       AFRO
## 202                                              South Sudan       AFRO
## 203                                                    Spain       EURO
## 204                                                Sri Lanka      SEARO
## 205                                                    Sudan       EMRO
## 206                                                 Suriname       AMRO
## 207                                                   Sweden       EURO
## 208                                              Switzerland       EURO
## 209                                     Syrian Arab Republic       EMRO
## 210                                               Tajikistan       EURO
## 211                                                 Thailand      SEARO
## 212                                       The United Kingdom       EURO
## 213                                              Timor-Leste      SEARO
## 214                                                     Togo       AFRO
## 215                                                  Tokelau       WPRO
## 216                                                    Tonga       WPRO
## 217                                      Trinidad and Tobago       AMRO
## 218                                                  Tunisia       EMRO
## 219                                                   Turkey       EURO
## 220                                             Turkmenistan       EURO
## 221                                 Turks and Caicos Islands       AMRO
## 222                                                   Tuvalu       WPRO
## 223                                                   Uganda       AFRO
## 224                                                  Ukraine       EURO
## 225                                     United Arab Emirates       EMRO
## 226                              United Republic of Tanzania       AFRO
## 227                                 United States of America       AMRO
## 228                             United States Virgin Islands       AMRO
## 229                                                  Uruguay       AMRO
## 230                                               Uzbekistan       EURO
## 231                                                  Vanuatu       WPRO
## 232                       Venezuela (Bolivarian Republic of)       AMRO
## 233                                                 Viet Nam       WPRO
## 234                                        Wallis and Futuna       WPRO
## 235                                                    Yemen       EMRO
## 236                                                   Zambia       AFRO
## 237                                                 Zimbabwe       AFRO
##      CaseCum DeathCum
## 1     117926     5154
## 2     164396     1673
## 3     151844     3852
## 4         84        0
## 5      27133       41
## 6      77934     1392
## 7       2510        9
## 8       6711      121
## 9    6793291    74187
## 10    247940     5283
## 11     25880      140
## 12   2812847     4262
## 13   2243646     5653
## 14    551403     6199
## 15     24584      591
## 16    392488     1005
## 17   1397575    20629
## 18     52127      283
## 19    632430     4504
## 20   2797457     7815
## 21     44304      336
## 22     20941       93
## 23     10808      111
## 24     12270        5
## 25    644965     9813
## 26      6517       24
## 27    239064    10363
## 28    235580     2309
## 29  18250872   394913
## 30      5932       61
## 31     63356       73
## 32    844241    25390
## 33      8769      233
## 34     35917       12
## 35     40563      254
## 36    129456     3032
## 37     83526     1372
## 38   2419341    14577
## 39     18935       15
## 40      9323       50
## 41      3284       50
## 42   2235394    21781
## 43    233956     1389
## 44   3814566    79033
## 45      4464       16
## 46     15200      250
## 47        25        0
## 48    600958     5228
## 49     48729      601
## 50    811821     9543
## 51   1020083     8172
## 52     34225      239
## 53    286036      623
## 54   2345278    17735
## 55         0        0
## 56     60242      628
## 57   2553335     2212
## 58      9481      126
## 59     10966       57
## 60    334939     1268
## 61    546252    19433
## 62    301347    13386
## 63     87920     2217
## 64      9879       91
## 65      6854       96
## 66    432451     1650
## 67     52146      738
## 68    309655     5097
## 69        59        0
## 70     33722       27
## 71     63871      832
## 72    632312     1913
## 73  18416911    49302
## 74     61025      306
## 75     49264      502
## 76     32978      220
## 77      7227      215
## 78   1341401    12679
## 79  12420150    52832
## 80     76005      835
## 81     11198        8
## 82   2225730    19280
## 83     11717       18
## 84     13542      215
## 85    114560      754
## 86     29773      191
## 87    602449    10578
## 88     12682       14
## 89     20405      351
## 90      4760      119
## 91     54353     1025
## 92     17905      570
## 93         0        0
## 94    242979     6634
## 95   1356656    28993
## 96    125217       33
## 97  31818804   356866
## 98   4247862   112335
## 99   5420260    76765
## 100  1607304    11583
## 101  1080706     1895
## 102    22331       45
## 103  2859921     4479
## 104  9858659    57068
## 105   104723     2391
## 106  4573119    15746
## 107    34292       43
## 108  1240618     9134
## 109  1127945    15558
## 110   216973     3783
## 111     2914       11
## 112   156992     1515
## 113   429042     1455
## 114   114276     1492
## 115   142698      621
## 116   566934     3606
## 117   693677     5399
## 118    22216      405
## 119     5374      209
## 120   361108     4082
## 121     9543       20
## 122   708041     5180
## 123   127823      355
## 124    43828     1069
## 125    53394     1575
## 126  3141984    31619
## 127   150676      235
## 128    22005      369
## 129    48661      290
## 130        0        0
## 131   107722      849
## 132    41446      540
## 133   169177      894
## 134    19062       77
## 135  3414819   108368
## 136        0        0
## 137     7456       27
## 138   905205     2094
## 139   154363     1677
## 140      143        1
## 141   677321     7365
## 142   165697     1551
## 143   448321    16173
## 144   118405     3585
## 145        0        0
## 146   702860     8928
## 147  5273722     5996
## 148    54255      299
## 149   118138       30
## 150     8816       51
## 151     4014      135
## 152    98903     1235
## 153        0        0
## 154   194475     5901
## 155     9407       28
## 156  1192645     1039
## 157   438854     3254
## 158   240748     2674
## 159       19        0
## 160   929387    17318
## 161     3781        6
## 162   414817     2248
## 163    39986      625
## 164   482784    15192
## 165  2192401    88598
## 166  3085648    44133
## 167        0        0
## 168  3965476    67724
## 169  2458056     4746
## 170   373200     2074
## 171   193318      412
## 172  3183425     6565
## 173   316933     7020
## 174   289746      614
## 175  1931490    43064
## 176 12237719   265991
## 177   110652     1196
## 178      202        0
## 179     3099        4
## 180        0        0
## 181     5489       42
## 182    19332      323
## 183     8288       32
## 184      971        1
## 185     6755       98
## 186       32        0
## 187    10661       38
## 188     4148       43
## 189   367644     2504
## 190    51173     1088
## 191  1451716    10798
## 192    36759      152
## 193     3778       46
## 194   664488      990
## 195      438        3
## 196     7497       58
## 197  1149204    11260
## 198   702651     2967
## 199     7031       99
## 200    19056     1109
## 201  2160649    49419
## 202     9052       44
## 203  7890220    25786
## 204   563396    15768
## 205    31001     2018
## 206    69204     1146
## 207  1781971     4557
## 208  2261799     3088
## 209    38987     2048
## 210     4072       34
## 211  2886316    22893
## 212 14723954    37647
## 213    22603      128
## 214    29905      188
## 215        0        0
## 216      355        0
## 217   120062     3489
## 218   764953    19783
## 219 11386977    65876
## 220        0        0
## 221     3753       22
## 222        0        0
## 223   123011     3254
## 224  3490009    80079
## 225   488449     1080
## 226    33111      777
## 227 49824729   421468
## 228    12749       84
## 229   783149     6374
## 230   156575     1014
## 231       17        0
## 232   376056     4295
## 233  3441037    40217
## 234      444        7
## 235     9482     1500
## 236   234216     2861
## 237   200291     3932
VaccineCovid
## # A tibble: 228 × 14
##    COUNTRY            ISO3  WHO_REGION DATA_SOURCE DATE_UPDATED TOTAL_VACCINATI…
##    <chr>              <chr> <chr>      <chr>       <date>                  <dbl>
##  1 Afghanistan        AFG   EMRO       REPORTING   2022-03-06            5597130
##  2 Albania            ALB   EURO       REPORTING   2022-02-20            2707658
##  3 Algeria            DZA   AFRO       REPORTING   2022-03-09           13704895
##  4 American Samoa     ASM   WPRO       REPORTING   2022-02-16              85050
##  5 Andorra            AND   EURO       REPORTING   2022-02-13             142420
##  6 Angola             AGO   AFRO       REPORTING   2022-03-08           16850195
##  7 Anguilla           AIA   AMRO       REPORTING   2022-02-25              22165
##  8 Antigua and Barbu… ATG   AMRO       REPORTING   2022-02-25             124726
##  9 Argentina          ARG   AMRO       REPORTING   2022-02-25           93008081
## 10 Armenia            ARM   EURO       REPORTING   2022-02-13            1971565
## # … with 218 more rows, and 8 more variables:
## #   PERSONS_VACCINATED_1PLUS_DOSE <dbl>, TOTAL_VACCINATIONS_PER100 <dbl>,
## #   PERSONS_VACCINATED_1PLUS_DOSE_PER100 <dbl>, PERSONS_FULLY_VACCINATED <dbl>,
## #   PERSONS_FULLY_VACCINATED_PER100 <dbl>, VACCINES_USED <chr>,
## #   FIRST_VACCINE_DATE <date>, NUMBER_VACCINES_TYPES_USED <dbl>
NewCovid <- as.data.frame(inner_join(One_Year_Cum, VaccineCovid))
## Joining, by = c("COUNTRY", "WHO_REGION")
library(readxl)
url <- "https://hdr.undp.org/sites/default/files/2020_statistical_annex_table_3.xlsx"
destfile <- "X2020_statistical_annex_table_3.xlsx"
curl::curl_download(url, destfile)
X2020_statistical_annex_table_3 <- read_excel(destfile, 
    col_types = c("skip", "text", "text", 
        "numeric", "text", "numeric", "text", 
        "numeric", "text", "text", "text", 
        "numeric", "text", "text", "text", 
        "numeric", "text", "text", "text", 
        "numeric", "text", "text", "text", 
        "numeric", "text", "text", "text", 
        "text", "text", "text", "text", "text"), 
    skip = 2)
## New names:
## * `` -> ...1
## * `` -> ...3
## * `` -> ...5
## * `` -> ...6
## * `` -> ...7
## * ...
X2020_statistical_annex_table_3 <- as.data.frame(X2020_statistical_annex_table_3[-1:-3,-5:-31])
X2020_statistical_annex_table_3 <- as.data.frame(X2020_statistical_annex_table_3[,-3])
X2020_statistical_annex_table_3 <- as.data.frame(X2020_statistical_annex_table_3[-193:-261,])


X2020_statistical_annex_table_3[,1] <- countrycode(X2020_statistical_annex_table_3[,1], origin = 'country.name', destination = 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: High human development, Low human development, Medium human development
NewCovid[,1] <- countrycode(NewCovid[,1], origin = 'country.name', destination = 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Bonaire, Saba, Sint Eustatius
colnames(X2020_statistical_annex_table_3) <- c("COUNTRY","HDI","IneqHDI")

WikiPop <- read_html("https://en.wikipedia.org/wiki/List_of_countries_by_population_(United_Nations)")
PopTable = html_node(WikiPop, ".wikitable")
PopTable = html_table(PopTable, fill = TRUE) 
PopTable <- as.data.frame(PopTable)
PopTable[,1] <- countrycode(PopTable[,1], origin = 'country.name', destination = 'iso3c')
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: American Samoa (United States), Anguilla (United Kingdom), Aruba (Netherlands), Bermuda (United Kingdom), British Virgin Islands (United Kingdom), Cayman Islands (United Kingdom), Cook Islands (New Zealand), Curaçao (Netherlands), Falkland Islands (United Kingdom), Faroe Islands (Denmark), French Guiana (France), French Polynesia (France), Gibraltar (United Kingdom), Greenland (Denmark), Guadeloupe (France)[v], Guam (United States), Isle of Man (United Kingdom), Mayotte (France), Micronesia, Montserrat (United Kingdom), New Caledonia (France), Niue (New Zealand), Northern Mariana Islands (United States), Puerto Rico (United States), Réunion (France), Saint Helena (United Kingdom)[y], Saint Pierre and Miquelon (France), Sint Maarten (Netherlands), Tokelau (New Zealand), Turks and Caicos Islands (United Kingdom), U.S. Virgin Islands (United States), Wallis and Futuna (France), World
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some strings were matched more than once, and therefore set to <NA> in the result: American Samoa (United States),ASM,USA; Anguilla (United Kingdom),AIA,GBR; Aruba (Netherlands),ABW,NLD; Bermuda (United Kingdom),BMU,GBR; British Virgin Islands (United Kingdom),VGB,GBR; Cayman Islands (United Kingdom),CYM,GBR; Cook Islands (New Zealand),COK,NZL; Curaçao (Netherlands),CUW,NLD; Falkland Islands (United Kingdom),FLK,GBR; Faroe Islands (Denmark),DNK,FRO; French Guiana (France),FRA,GUF; French Polynesia (France),FRA,PYF; Gibraltar (United Kingdom),GIB,GBR; Greenland (Denmark),DNK,GRL; Guadeloupe (France)[v],FRA,GLP; Guam (United States),GUM,USA; Isle of Man (United Kingdom),IMN,GBR; Mayotte (France),FRA,MYT; Montserrat (United Kingdom),MSR,GBR; New Caledonia (France),FRA,NCL; Niue (New Zealand),NZL,NIU; Northern Mariana Islands (United States),MNP,USA; Puerto Rico (United States),PRI,USA; Réunion (France),FRA,REU; Saint Helena (United Kingdom)[y],SHN,GBR; Saint Pierre and Miquelon (France),FRA,SPM; Sint Maarten (Netherlands),NLD,SXM; Tokelau (New Zealand),NZL,TKL; Turks and Caicos Islands (United Kingdom),TCA,GBR; U.S. Virgin Islands (United States),VIR,USA; Wallis and Futuna (France),FRA,WLF
names(PopTable)[1] <- 'COUNTRY'
 
NewCovid <-inner_join(X2020_statistical_annex_table_3, NewCovid)
## Joining, by = "COUNTRY"
NewCovid <- na.omit(inner_join(PopTable, NewCovid))
## Joining, by = "COUNTRY"
NewCovid[,1] <- countrycode(NewCovid[,1], origin = 'iso3c', destination = 'country.name')
NewCovid$HDI <- as.numeric(NewCovid$HDI)
NewCovid$IneqHDI <- as.numeric(NewCovid$IneqHDI)
## Warning: NAs introduced by coercion
NewCovid <- NewCovid[,-2:-4]
NewCovid <- NewCovid[,-3]
names(NewCovid)[2] <- 'Population'
NewCovid$Population <- as.numeric(gsub(",","",NewCovid$Population))

The data set(s) are from the World Health Organization’s Database,Wikipedia, and the United Nations Development Programme.

CovidWHO is the covid global data, and has the Date Reported, Country Code, Name of Country, WHO Region, New cases, Cumulative cases,New deaths, and Cumulative deaths.

One_Year_Cum has Country Name, WHO Region, Cumulative Cases between March 1st 2021 and 2022, and Cumulative Deaths between March 1st 2021 and 2022

VaccineCovid has Country, ISO3 code for each country, WHO Region, Source of Data, Date Updated, Total vaccinations, Persons vaccinated with one or more dose, Persons Total vaccinated out of 100, Persons vaccinated with one or more dose out of 100, Persons fully vaccinated, Persons fully vaccinated out of 100, Vaccine types used, First vaccination Date, and types of vaccination used.

X2020_statistical_annex_table_3 has Country name, HDI and HDI adjusted for Inequality for 2020.

PopTable has Country (IS03), UN Continental Region, UN Statistical Region, Population for each country as of July 1st 2018, Population for each country as of July 1st 2019, and Percent change of population between those two years.

NormalizedCases <- NewCovid$CaseCum/NewCovid$Population*100000
NormalizedDeaths <- NewCovid$DeathCum/NewCovid$Population*100000
CaseMortalityRate <- (NormalizedDeaths/NormalizedCases)*100

NewCovid2<- na.omit(data.frame(NewCovid$COUNTRY, NewCovid$WHO_REGION, NormalizedCases, NormalizedDeaths, CaseMortalityRate, NewCovid$HDI,NewCovid$PERSONS_FULLY_VACCINATED_PER100, NewCovid$Population))
names(NewCovid2)[1] <- 'COUNTRY'
names(NewCovid2)[2] <- 'WHO_REGION'
names(NewCovid2)[3] <- 'NormCase'
names(NewCovid2)[4] <- 'NormDeath'
names(NewCovid2)[5] <- 'CaseMortalityRate'
names(NewCovid2)[6] <- 'HDI'
names(NewCovid2)[7] <- 'FullyVac100'
names(NewCovid2)[8] <- 'Pop'
View(NewCovid2)

To create an organized dataset for the purpose of modelling, I coerced the datasets I described into NewCovid2, with the variables Country, WHO Region, Normalized Cases (Cumulative Cases per 100000 people) between March 1st 2021 and March 1st 2022, Normalized Deaths (Cumulative Deaths per 100000 people) between March 1st 2021 and March 1st 2022,

The Corona virus Pandemic has been going on since March 2020, and has led to numerous lock downs worldwide and safety precautions being put in place for the last two years, and has eased due to Covid-19 vaccinations being rolled out,distancing measures, and mask mandates.

The vaccine roll out in most countries was a year after the pandemic has started, namely around Early March. This report will attempt to give answers to questions such as: how much variance does the HDI and Persons fully vaccinated (out of 100) explain for case mortality? What model captures this relationship best, and is there a difference of case mortality between each WHO Region?

EXPLORATION OF DATASET

To explore the dataset, I will generate summary statistics for the Variables in NewCovid2 globally as below:

summary(NewCovid2)
##    COUNTRY           WHO_REGION           NormCase         NormDeath        
##  Length:178         Length:178         Min.   :      6   Min.   :    0.000  
##  Class :character   Class :character   1st Qu.:    591   1st Qu.:    8.733  
##  Mode  :character   Mode  :character   Median :   5408   Median :   38.888  
##                                        Mean   :  56697   Mean   :  188.448  
##                                        3rd Qu.:  12234   3rd Qu.:   93.262  
##                                        Max.   :8547567   Max.   :21854.881  
##  CaseMortalityRate      HDI          FullyVac100         Pop           
##  Min.   : 0.0000   Min.   :0.3940   Min.   : 0.08   Min.   :1.801e+04  
##  1st Qu.: 0.4374   1st Qu.:0.6012   1st Qu.:22.67   1st Qu.:2.137e+06  
##  Median : 1.0617   Median :0.7410   Median :50.36   Median :9.138e+06  
##  Mean   : 1.3786   Mean   :0.7229   Mean   :47.54   Mean   :4.135e+07  
##  3rd Qu.: 1.8180   3rd Qu.:0.8433   3rd Qu.:71.01   3rd Qu.:2.908e+07  
##  Max.   :15.8194   Max.   :0.9570   Max.   :97.25   Max.   :1.434e+09

Overall globally, there are 178 different countries in my Data Set.

For the variable Normalized Cases, the minimum was 6, 1st Quartile was 590 cases, Mean was 56697 cases, 3rd Quartile was 12234 and Maximum was 8547567 (all out of 100000 persons).

For the variable Normalized Deaths, the minimum was 0, 1st Quartile was 8.733 deaths, Mean was 188.448 deaths, 3rd Quartile was 93.262 and Maximum was 21854.881 (all out of 100000 persons).

For Case Mortality Rate, the minimum was 0%, 1st Quartile was 0.4374%, Mean was 1.3786%, 3rd Quartile was 1.8180%, and maximum was 15.8194%.

For HDI, the minimum was 0.3940, the 1st Quartile was 0.6012, the Mean was 0.7229, 3rd Quartile was 0.8433, and maximum was 0.9570.

For Full Vaccinations, the minimum was .08, 1st Quartile was 22.67, Mean was 47.54, 3rd Quartile was 71.01, and maximum was 97.25 (all out of 100 persons).

MODEL EXPLORATION: CASE MORTALITY RATE versus HDI and PERSONS FULLY VACCINATED (out of 100)

To begin, I will fit a linear model of case mortality rate against HDI and persons fully vaccinated (out of 100). The assumptions for this model that must be satisfied are normality, homoscedasticity (if there is equal variance in the standardized residuals), if the errors are independent (no Autocorrelation), and if my independent variables (or predictors) have significant multicollinearity.

check_outlier <- function(v, coef=1.5){
  quantiles <- quantile(v,probs=c(0.25,0.75))
  IQR <- quantiles[2]-quantiles[1]
  res <- v < (quantiles[1]-coef*IQR)|v > (quantiles[2]+coef*IQR)
  return(res)
}
LModel <- lm(formula = NewCovid2$CaseMortalityRate ~ NewCovid2$HDI*NewCovid2$FullyVac100)
summary(LModel)
## 
## Call:
## lm(formula = NewCovid2$CaseMortalityRate ~ NewCovid2$HDI * NewCovid2$FullyVac100)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5243 -0.6380 -0.2213  0.2587 13.3146 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                          2.96236    1.01886   2.908  0.00412 **
## NewCovid2$HDI                       -0.93183    1.72283  -0.541  0.58929   
## NewCovid2$FullyVac100               -0.00937    0.02192  -0.427  0.66959   
## NewCovid2$HDI:NewCovid2$FullyVac100 -0.01237    0.02967  -0.417  0.67714   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.41 on 174 degrees of freedom
## Multiple R-squared:  0.1887, Adjusted R-squared:  0.1748 
## F-statistic: 13.49 on 3 and 174 DF,  p-value: 5.876e-08
ggplot() + aes(x = "", y = NewCovid2$CaseMortalityRate) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()

ggplot() + aes(x = "", y = NewCovid2$HDI) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()

ggplot() + aes(x = "", y = NewCovid2$FullyVac100) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()

shapiro.test(studres(LModel))
## 
##  Shapiro-Wilk normality test
## 
## data:  studres(LModel)
## W = 0.48376, p-value < 2.2e-16

Looking at the box plot of Case Mortality Rate, there seems to be outliers in the data, which may communicate that the data will not satisfy Normality. This is supported up by the Shapiro Wilk Test, which has a p value of \[p = 1.358 \times 10^{-10} < .05\]

\[H_0: \text{Resiudals Normally Distributed } H_a: \text{Residuals not Normally Distributed}\] Hence, we reject the Null Hypothesis, that the residuals are normally distributed (Normality) in favor of the Alternative Hypothesis, that the residuals are not normally distributed. To fix this a boxcox transformation would probably be best, as well as checking if there still are outliers and potentially removing them as necessary. These two measures will likely allow the normality assumption to be satisfied, and allow us to continue with model diagnostics.

NewCovid2[NewCovid2 == 0] <- NA
NewCovid2 <- na.omit((NewCovid2)) #remove variables with 0 as to allow for boxcox
bc <- boxcox(NewCovid2$CaseMortalityRate ~ NewCovid2$HDI*NewCovid2$FullyVac100)

(lambda <- bc$x[which.max(bc$y)])
## [1] 0.1414141
new_model <- lm(((NewCovid2$CaseMortalityRate^lambda-1)/lambda) ~ NewCovid2$HDI*NewCovid2$FullyVac100)

data <- ((NewCovid2$CaseMortalityRate^lambda-1)/lambda)
outlier <- check_outlier(data)
label <- ifelse(outlier,NewCovid2$COUNTRY,"")



ggplot() + aes(x = "", y = ((NewCovid2$CaseMortalityRate^lambda-1)/lambda)) +
  geom_boxplot(fill = "#0c4c8a") +
  theme_minimal()+geom_text(aes(label=label),hjust=-1)

plot(new_model, 1, id.n = 5)

plot(new_model, 2, id.n = 5)

plot(new_model, 3, id.n = 5)

plot(new_model, 4, id.n = 5)

plot(new_model, 5, id.n = 5)

CorMat <- data.frame(((NewCovid2$CaseMortalityRate^lambda-1)/lambda),NewCovid2$HDI,NewCovid2$FullyVac100)
CorMat <- cor(CorMat)
CorMat
##                                                    X..NewCovid2.CaseMortalityRate.lambda...1..lambda.
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.                                          1.0000000
## NewCovid2.HDI                                                                              -0.5085029
## NewCovid2.FullyVac100                                                                      -0.5552074
##                                                    NewCovid2.HDI
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.    -0.5085029
## NewCovid2.HDI                                          1.0000000
## NewCovid2.FullyVac100                                  0.7973061
##                                                    NewCovid2.FullyVac100
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.            -0.5552074
## NewCovid2.HDI                                                  0.7973061
## NewCovid2.FullyVac100                                          1.0000000
findCorrelation(abs(CorMat), cutoff = 0.8, names = TRUE)
## character(0)
summary(new_model)
## 
## Call:
## lm(formula = ((NewCovid2$CaseMortalityRate^lambda - 1)/lambda) ~ 
##     NewCovid2$HDI * NewCovid2$FullyVac100)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -3.00184 -0.42056 -0.00359  0.47031  2.97403 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         -0.80208    0.56901  -1.410 0.160472    
## NewCovid2$HDI                        2.55112    0.96216   2.651 0.008768 ** 
## NewCovid2$FullyVac100                0.04703    0.01234   3.812 0.000192 ***
## NewCovid2$HDI:NewCovid2$FullyVac100 -0.08858    0.01664  -5.322 3.19e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7871 on 171 degrees of freedom
## Multiple R-squared:  0.4168, Adjusted R-squared:  0.4065 
## F-statistic: 40.73 on 3 and 171 DF,  p-value: < 2.2e-16
shapiro.test(studres(new_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  studres(new_model)
## W = 0.97138, p-value = 0.001136
model.diag.metrics <- augment(new_model)
model.diag.metrics %>%
  top_n(5, wt = .cooksd)
## # A tibble: 5 × 9
##   `((NewCovid2$C…` `NewCovid2$HDI` `NewCovid2$Ful…` .fitted .resid   .hat .sigma
##              <dbl>           <dbl>            <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
## 1             2.14           0.51              5.32  0.509    1.64 0.0275  0.779
## 2             3.38           0.47              1.29  0.404    2.97 0.0429  0.754
## 3            -2.70           0.433             0.08  0.303   -3.00 0.0588  0.753
## 4            -2.57           0.654            74.5   0.0545  -2.63 0.0479  0.762
## 5            -2.84           0.949            80.9  -1.38    -1.47 0.0343  0.781
## # … with 2 more variables: .cooksd <dbl>, .std.resid <dbl>
model.diag.metrics %>%
  top_n(5, wt = .std.resid)
## # A tibble: 5 × 9
##   `((NewCovid2$CaseMor…` `NewCovid2$HDI` `NewCovid2$Ful…` .fitted .resid    .hat
##                    <dbl>           <dbl>            <dbl>   <dbl>  <dbl>   <dbl>
## 1                   1.25           0.779            61.1   -0.157   1.41 0.00898
## 2                   2.14           0.51              5.32   0.509   1.64 0.0275 
## 3                   1.54           0.777            73.1   -0.414   1.96 0.0143 
## 4                   3.38           0.47              1.29   0.404   2.97 0.0429 
## 5                   1.39           0.759            76.7   -0.415   1.81 0.0200 
## # … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>

It seems that Normality is not satisfied yet, so the next course of action would be to remove outliers based off their cook’s distances. Based off the cooks distance plot, the data points that have the highest cook distances are points 45, 77, and 151 which correspond to Yemen (with a case mortality rate of \(\approx 15.9\%\)), Burundi (with a case mortality rate of \(\approx 0.0334\%\)) and Guyana (with a case mortality rate of \(\approx 1.885\%\)) With regard to Yemen, it has the highest case mortality rate globally, and is an extreme outlier, not following the overall trend. With regard to Burundi, it has a very low case mortality rate compared to it’s HDI (0.433) and persons fully vaccinated (0.080 out of 100), and Guyana has a fairly low case mortality rate compared to it’s HDI (0.682) and persons fully vaccinated (44.467 out of 100). Hence, these data points do not follow the general trend associated with the data, and there will likely be a marked improvement in the normality of the data once these points are removed.

NewCovid2 <- NewCovid2[-c(45,77,151), ]
bc <- boxcox(NewCovid2$CaseMortalityRate ~ NewCovid2$HDI*NewCovid2$FullyVac100)

(lambda <- bc$x[which.max(bc$y)])
## [1] 0.1414141
new_model <- lm(((NewCovid2$CaseMortalityRate^lambda-1)/lambda) ~ NewCovid2$HDI*NewCovid2$FullyVac100)
confint(new_model)
##                                           2.5 %      97.5 %
## (Intercept)                         -1.72185026  0.33293762
## NewCovid2$HDI                        0.56501098  3.99811491
## NewCovid2$FullyVac100                0.02977906  0.07448294
## NewCovid2$HDI:NewCovid2$FullyVac100 -0.12220154 -0.06234453
CorMat <- data.frame(((NewCovid2$CaseMortalityRate^lambda-1)/lambda),NewCovid2$HDI,NewCovid2$FullyVac100)
CorMat <- cor(CorMat)
CorMat
##                                                    X..NewCovid2.CaseMortalityRate.lambda...1..lambda.
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.                                          1.0000000
## NewCovid2.HDI                                                                              -0.5621095
## NewCovid2.FullyVac100                                                                      -0.5876201
##                                                    NewCovid2.HDI
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.    -0.5621095
## NewCovid2.HDI                                          1.0000000
## NewCovid2.FullyVac100                                  0.7954318
##                                                    NewCovid2.FullyVac100
## X..NewCovid2.CaseMortalityRate.lambda...1..lambda.            -0.5876201
## NewCovid2.HDI                                                  0.7954318
## NewCovid2.FullyVac100                                          1.0000000
findCorrelation(abs(CorMat), cutoff = 0.9, names = TRUE)
## character(0)
plot(new_model, 1, id.n = 5)

plot(new_model, 2, id.n = 5)

plot(new_model, 3, id.n = 5)

plot(new_model, 4, id.n = 5)

plot(new_model, 5, id.n = 5)

hist(rstandard(new_model))

summary(new_model)
## 
## Call:
## lm(formula = ((NewCovid2$CaseMortalityRate^lambda - 1)/lambda) ~ 
##     NewCovid2$HDI * NewCovid2$FullyVac100)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.55965 -0.41834 -0.02625  0.44482  1.89653 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         -0.69446    0.52041  -1.334  0.18387    
## NewCovid2$HDI                        2.28156    0.86950   2.624  0.00949 ** 
## NewCovid2$FullyVac100                0.05213    0.01132   4.604 8.13e-06 ***
## NewCovid2$HDI:NewCovid2$FullyVac100 -0.09227    0.01516  -6.087 7.58e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6936 on 168 degrees of freedom
## Multiple R-squared:  0.4836, Adjusted R-squared:  0.4744 
## F-statistic: 52.44 on 3 and 168 DF,  p-value: < 2.2e-16
shapiro.test(studres(new_model))
## 
##  Shapiro-Wilk normality test
## 
## data:  studres(new_model)
## W = 0.99487, p-value = 0.818
bptest(new_model)
## 
##  studentized Breusch-Pagan test
## 
## data:  new_model
## BP = 4.7717, df = 3, p-value = 0.1893
durbinWatsonTest(new_model)
##  lag Autocorrelation D-W Statistic p-value
##    1      0.06751893       1.86198     0.3
##  Alternative hypothesis: rho != 0
model.diag.metrics <- augment(new_model)
model.diag.metrics %>%
  top_n(5, wt = .cooksd)
## # A tibble: 5 × 9
##   `((NewCovid2$C…` `NewCovid2$HDI` `NewCovid2$Ful…` .fitted .resid   .hat .sigma
##              <dbl>           <dbl>            <dbl>   <dbl>  <dbl>  <dbl>  <dbl>
## 1            2.14            0.51              5.32   0.496   1.65 0.0305  0.683
## 2            1.32            0.394             6.37   0.305   1.02 0.0688  0.691
## 3           -0.686           0.433             3.86   0.340  -1.03 0.0565  0.691
## 4           -2.86            0.931            82.1   -1.34   -1.52 0.0310  0.685
## 5           -2.84            0.949            80.9   -1.40   -1.45 0.0347  0.686
## # … with 2 more variables: .cooksd <dbl>, .std.resid <dbl>
model.diag.metrics %>%
  top_n(5, wt = .std.resid)
## # A tibble: 5 × 9
##   `((NewCovid2$CaseMor…` `NewCovid2$HDI` `NewCovid2$Ful…` .fitted .resid    .hat
##                    <dbl>           <dbl>            <dbl>   <dbl>  <dbl>   <dbl>
## 1                   1.25           0.779            61.1   -0.123   1.38 0.00926
## 2                   2.14           0.51              5.32   0.496   1.65 0.0305 
## 3                   1.54           0.777            73.1   -0.353   1.90 0.0148 
## 4                   1.11           0.782            66.2   -0.236   1.34 0.0104 
## 5                   1.39           0.759            76.7   -0.336   1.73 0.0209 
## # … with 3 more variables: .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>

To reiterate, the null and alternative hypothesis for normality are as follows, \[H_0: \text{Resiudals Normally Distributed } H_a: \text{Residuals not Normally Distributed}\]

Since the Shapiro-Wilk normality test has has a p-value of \(p = 0.818 > .05\), we can accept the null hypothesis, and normality is now satisfied. This is supported by the Q-Q plot, as the residuals approximately follow a normal distribution. With Normality satisfied, the other assumptions can now be checked.

Now, we must check homoscedasticity. The null and alternative hypothesis for homoscedasticity are as follows,\[H_0: \text{constant variances in the residuals errors (homoscedasticity) } H_a: \text{non-constant variances in the residuals errors}\]

Since the studentized Breusch-Pagan test has a p-value of \(p = 0.1893 > .05\), we accept the null hypothesis, and homoscedasticity is satisfied. This is supported by the fact that the scale-location plot has a horizontal line with equally spread points. This is supported because in our Scale-Location plot, we have a horizontal line with equally spread points.

Now, we must check for independence, or autocorrelation. The null and alternative hypothesis for autocorrelation are as follows,\[H_0: \text{errors are independent (no autocorrelation) } H_a: \text{errors are not independent (autocorrelation)}\]

Since the Durbin-Watson test has a p-value of \(p = 0.372 > .05\), we accept the null hypothesis, and independence is satisfied

Finally, I believe there is not significant correlation, or Multicollinearity in our model. Although the persons fully vaccinated (out of 100) and HDI have a correlation of 0.7954318, these two variables give different information with regard to the dependent variable, case mortality rate. For example, although how developed a nation is (HDI) is correlated with how many people are fully vaccinated (out of 100), due to people in more developed nations having better access to vaccines to begin with, the HDI also accounts for other factors such as life expectancy, years of education, and per capita income, which are related to mortality rate as countries without a high life expectancy will probably be hit harder by covid (individuals more malnourished, etc). Countries with high per capita income probably also have better access to medical devices, medicines, and overall a more robust healthcare system, all which decrease case mortality rate, something not accounted for in persons fully vaccinated (out of 100) Hence, I believe these two variables communicate some degree of differing information. Adding to this, none of the values in my correlation matrix have a correlation over .9, so they carry differing information. Both of my predictor variables have moderate correlation with the output (case mortality rate), so they should both be kept in the model.

Now, I will check if my coefficients are significant (non-zero). My \(\alpha = \frac{.05}{4} = .0125\), as I need to use the Bonferroni correction and divide by the number of hypothesis tests, and my confidence interval is 95%

Since for \(\beta_0\) \(p = 0.1838 > .0125 = \alpha\) and my null and alternative hypothesis are as follows \[H_0: \beta_0 \neq 0, H_a: \beta_0 = 0\], I reject my null hypothesis in favor of the alternative, \(\beta_0 = 0\)

Since for \(\beta_1\) \(p = 0.00949 < .0125 = \alpha\) and my null and alternative hypothesis are as follows \[H_0: \beta_1 \neq 0, H_a: \beta_1 = 0\], I accept my null hypothesis, \(\beta_1 \neq 0\)

Since for \(\beta_2\) \(p = 8.13 \times 10 ^{-6} < .0125 = \alpha\) and my null and alternative hypothesis are as follows \[H_0: \beta_2 \neq 0, H_a: \beta_2 = 0\], I accept my null hypothesis, \(\beta_2 \neq 0\)

Since for \(\beta_{1,2}\) \(p = 7.58 \times 10 ^{-9} < .0125 = \alpha\) and my null and alternative hypothesis are as follows \[H_0: \beta_{1,2} \neq 0, H_a: \beta_{1,2} = 0\], I accept my null hypothesis, \(\beta_{1,2} \neq 0\)

This is supported by their 95% confidence intervals \[\beta_0 \in [-1.72185026, 0.33293762], \beta_1 \in [0.56501098, 3.99811491], \beta_2 \in [0.02977906,0.07448294], \beta_{1,2} \in [-0.12220154, -0.06234453]\] as \(\beta_0\)’s contains 0 while the rest do not.

This model also has an overall p value of \(p = 2.2 \times 10^{-16}\), so there is a significant relationship described by the model Finally, this model has \(R^2 = 0.4836\), meaning that this model explains roughly half the variance in the dependent variable.

The model is as follows

\[\frac{\hat{Y_i}^{\lambda}-1}{\lambda} = \beta_1 X_{1_i} + \beta_2 X_{2_i} -\beta_{1,2}X_{1_i} X_{2_i} \] \[\frac{\hat{Y_i}^{0.1414141}-1}{0.1414141} = 2.28156 X_{1_i} + 0.05213 X_{2_i} -0.09227X_{1_i} X_{2_i} \] \[{\hat{Y_i}^{0.1414141}} = 1+ 0.3226448 X_{1_i} + 0.0073719 X_{2_i} −0.0130483X_{1_i} X_{2_i} \]

\[{\hat{Y_i}^{0.1414141}} = 1+ 0.3226448 X_{1_i} + 0.0073719 X_{2_i} −0.0130483X_{1_i} X_{2_i} \] \[\hat{Y_i} = (1+ 0.3226448 X_{1_i} + 0.0073719 X_{2_i} −0.0130483X_{1_i} X_{2_i})^{7.07143064235}\]

with \(\hat{Y}\) being case mortality rate (as %), \(X_1\) being \(HDI\) and \(X_2\) being persons fully vaccinated (out of 100).
Below is a dot plot of the data,

CovidCumulative <- NewCovid2 %>% arrange(desc(CaseMortalityRate))%>% 
mutate(COUNTRY= factor(COUNTRY)) %>%
mutate(text = paste("Country: ", COUNTRY, "\nPersons Fully Vaccinated (per 100 population): ", FullyVac100, "\nCumulative Cases (per 100000 pop): ", NormCase, "\nNew Deaths in last 7 Days (per 100000 pop): ", NormDeath, "\nCase Mortality Rate: ", CaseMortalityRate ,"\nHDI:", HDI,"\nPopulation:", `Pop`, sep="")) %>%
  

  
ggplot(aes(x=HDI, y = ((NewCovid2$CaseMortalityRate^lambda-1)/lambda), size = FullyVac100, color = WHO_REGION, text = text)) +
geom_point(alpha=0.5) +
scale_size(range = c(.1, 7), name="Persons Fully Vaccinated (per 100 population)")+
theme(legend.position="right") +
ylab("boxcox(Case Mortality Rate (as %))") +
xlab("HDI") +
scale_colour_viridis_d(option = "turbo")  


CovidCumulativeInteractive <- ggplotly(CovidCumulative, tooltip = "text")
CovidCumulativeInteractive

Overall, based off this model we can conclude that around half of the variance in the predictor variable is based on how many people are fully vaccinated (out of 100), and the HDI of the country being looked at. Hence, this confirms that Covid in the past year or so has hit underdeveloped nations, and nations without access to vaccinations the hardest. This is important because guaranteeing a better living standard, and sharing vaccines with impoverished nations can significantly decrease their case mortality rate, and save lives. Next, I will compare the case mortality rate of each World Health Organization Region, which may support this idea.

MODEL EXPLORATION: ANOVA, CASE MORTALITY RATE, AND PERSONS FULLY VACCINATED (OUT OF 100)

boxcoxCaseMortalityRate <- ((NewCovid2$CaseMortalityRate^lambda-1)/lambda)
WHO_REGION <- as.factor(NewCovid2$WHO_REGION)
data <- data.frame(boxcoxCaseMortalityRate,WHO_REGION)

new_model_AOV <- aov(boxcoxCaseMortalityRate~WHO_REGION)



p <- ggplot(data, aes(x=WHO_REGION, y=boxcoxCaseMortalityRate, fill=WHO_REGION)) + 
  geom_boxplot() + labs(title="Plot of length  per dose",x="Dose (mg)", y = "Length")
p

summary(new_model_AOV)
##              Df Sum Sq Mean Sq F value Pr(>F)    
## WHO_REGION    5  34.69   6.937   9.454  6e-08 ***
## Residuals   166 121.82   0.734                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
bptest(new_model_AOV)
## 
##  studentized Breusch-Pagan test
## 
## data:  new_model_AOV
## BP = 16.788, df = 5, p-value = 0.00492
shapiro.test(studres(new_model_AOV))
## 
##  Shapiro-Wilk normality test
## 
## data:  studres(new_model_AOV)
## W = 0.99443, p-value = 0.7646
plot(new_model_AOV,1)

plot(new_model_AOV,2)

plot(new_model_AOV,3)

plot(new_model_AOV,4)

plot(new_model_AOV,5)

plot(new_model_AOV,6)

hist(boxcoxCaseMortalityRate)

My ANOVA is a model of \[\frac{\hat{y_{ij}}^{\lambda}-1}{\lambda} = \mu_{j}+ e_{ij}\]

Where \(y_{ij}\) is the real valued response for the ith subject of the jth factor level, \(\mu_j\) is the population mean of the jth factor level, and \(e_{ij} \sim N(0,\sigma^2)\) An Anova model allows us to determine if there is a difference in the means of different factors, in this case we are interested in seeing if there is a difference in the mean case mortality rate by WHO Region.

I am using the same box-cox transformation I used in my first model, as it seems to normalize the predictor variable, as shown by the histogram above.

For this model, we need to check the assumptions of homoscedasticity (homogeneity of variance), normality, and independence.

For my ANOVA Model, my Breusch-Pagan test has a p value of \(p = 0.00492 < .05 = \alpha\) so it does not satisfy homoscedasticity (homogeneity of variance). However the p value of the shapiro test is \(p = 0.7646 > .05= \alpha\) so the model is normal (satisfies normality). Hence, I can use a Welches Anova which assumes Normality but not homoscedasticity. I will also assume independence of observations, as one regions case mortality rate should not affect anthers.

new_model_welch <- oneway.test(boxcoxCaseMortalityRate~WHO_REGION)
new_model_welch
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  boxcoxCaseMortalityRate and WHO_REGION
## F = 8.6415, num df = 5.000, denom df = 45.427, p-value = 8.177e-06

Because the p-value of my Welch Anova is \(p = 8.177\times 10^{-6} < .05 = \alpha\), I can assume that there is a difference in means between groups. To figure out what means differ, I will use a Games-Howell Test.

games_howell_test(data, boxcoxCaseMortalityRate~WHO_REGION ,conf.level = 0.95, detailed = FALSE)
## # A tibble: 15 × 8
##    .y.            group1 group2 estimate conf.low conf.high   p.adj p.adj.signif
##  * <chr>          <chr>  <chr>     <dbl>    <dbl>     <dbl>   <dbl> <chr>       
##  1 boxcoxCaseMor… AFRO   AMRO     0.0128   -0.430     0.456 1   e+0 ns          
##  2 boxcoxCaseMor… AFRO   EMRO    -0.146    -0.972     0.680 9.93e-1 ns          
##  3 boxcoxCaseMor… AFRO   EURO    -0.900    -1.39     -0.411 1.02e-5 ****        
##  4 boxcoxCaseMor… AFRO   SEARO   -0.130    -1.28      1.03  9.98e-1 ns          
##  5 boxcoxCaseMor… AFRO   WPRO    -1.07     -1.88     -0.256 6   e-3 **          
##  6 boxcoxCaseMor… AMRO   EMRO    -0.159    -1.00      0.687 9.92e-1 ns          
##  7 boxcoxCaseMor… AMRO   EURO    -0.913    -1.44     -0.383 4.29e-5 ****        
##  8 boxcoxCaseMor… AMRO   SEARO   -0.142    -1.30      1.02  9.98e-1 ns          
##  9 boxcoxCaseMor… AMRO   WPRO    -1.08     -1.91     -0.248 6   e-3 **          
## 10 boxcoxCaseMor… EMRO   EURO    -0.754    -1.62      0.113 1.17e-1 ns          
## 11 boxcoxCaseMor… EMRO   SEARO    0.0166   -1.26      1.29  1   e+0 ns          
## 12 boxcoxCaseMor… EMRO   WPRO    -0.922    -1.97      0.127 1.13e-1 ns          
## 13 boxcoxCaseMor… EURO   SEARO    0.770    -0.397     1.94  2.92e-1 ns          
## 14 boxcoxCaseMor… EURO   WPRO    -0.168    -1.02      0.685 9.9 e-1 ns          
## 15 boxcoxCaseMor… SEARO  WPRO    -0.939    -2.21      0.331 2.24e-1 ns

Based upon my Games-Howell Test, there is a significant difference in the means of Europe and Africa, the Western Pacific and Africa, Europe and the Americas, and the West Pacific and the Americas. In particular, the 95% Confidence interval in the difference of the means of Europe and Africa is \([-1.3880957, -0.4114904]\), the 95% Confidence interval in the difference of the means of the Western Pacific and Africa is \([-1.8808796, -0.2556291]\), the 95% Confidence interval in the difference of the means of the Europe and the Americas is \([-1.4422163 -0.3829553]\) and, the 95% Confidence interval in the difference of the means of the West Pacific and the Americas is \([-1.9139549, -0.2481394]\).

This communicates that Africa has a significantly larger Mortality Rate than Europe, the Americas has a significantly larger mortality rate than Europe, Africa has a significantly larger Mortality Rate than the West Pacific and the Americas has a significantly larger mortality rate than the West Pacific. This is significant because, coupled with the linear model fitted from earlier, it seems very likely that more developed regions (such as Europe and the West Pacific), have markedly lower case mortality rates than regions that are less developed overall (such as the Americas as a whole, and Africa). With this knowledge, we should give/offer vaccinations to less developed parts of the world to prevent more deaths, and decrease their case mortality rates.

CONCLUSION

Overall, through exploring the data and using different models we were able to get a lot of insight into mechanisms of the covid pandemic, in regard to case mortality rate. Through fitting a linear regression model with BoxCox of Case Mortality Rate against HDI and Persons Fully Vaccinated (out of 100), we learned these two explanatory variables, and their interaction terms can explain around half the variance in the predictor variable. This communicated that how developed a country is, along with how many persons out of 100 are fully vaccinated, significantly changes the case mortality rate, as having a higher HDI and higher persons fully vaccinated out of 100 results in a lower case mortality rate.

The secondary model is an ANOVA model comparing each WHO’s region case mortality rate, as transformed by the same BoxCox in the linear regression model. This model communicated that both Europe and the West Pacific have a markedly lower case mortality rate than that both of the Americas and Africa. This ties into the first model as Europe and the West Pacific are generally higher developed regions, with more persons fully vaccinated overall than the Americas and Africa. With this knowledge, we should give excess vaccines to underdeveloped regions, such as Africa, do prevent deaths and eventually decrease their case mortality rate.

Some Caveats with this model is that the HDI is from 2020, and population that I used to normalize the Cumulative Deaths and Cases is from 2019, although these likely are not significant enough stressors to dampen my models accuracy, as the HDI of nations and their population likely were quite similar between 2020 and 2022 (they don’t change much). My cumulative cases and cumulative deaths also are from March 1st of 2021 to March 1st of 2022, as the former date I felt was a good predictor of when most countries started offering vaccinations for COVID-19, so this should only be limited to that time frame. This model also assumes integrity and honestly in reporting covid case and death numbers, along with the capability to do so in the first place. Further analysis of the relationship between the ability to report cases, and how developed a nation is would be good, as it would help us understand if underdeveloped nations have less of a capability to report cases and deaths. Finally, the persons fully vaccinated (out of 100) is the most up to date one for each nation, and most definitely changed over the course of a year, which decreased the case mortality rate as persons fully vaccinated out of 100 increased. However, a better, more thorough analysis would require better data, as in how persons fully vaccinated changed over time, as the model is likely very sensitive to it changing.

sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.3.1      rgl_0.108.3       rstatix_0.7.0     broom_0.7.12     
##  [5] lmtest_0.9-39     zoo_1.8-9         car_3.0-12        carData_3.0-5    
##  [9] reshape2_1.4.4    rvest_1.0.2       countrycode_1.3.1 viridis_0.6.2    
## [13] viridisLite_0.4.0 hrbrthemes_0.8.0  class_7.3-20      caret_6.0-90     
## [17] lattice_0.20-45   MASS_7.3-54       glmnet_4.1-3      Matrix_1.3-4     
## [21] ggpubr_0.4.0      forcats_0.5.1     stringr_1.4.0     dplyr_1.0.8      
## [25] purrr_0.3.4       readr_2.1.2       tidyr_1.2.0       tibble_3.1.6     
## [29] tidyverse_1.3.1   plotly_4.10.0     ggplot2_3.3.5    
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_2.0-2     selectr_0.4-2        ggsignif_0.6.3      
##  [4] ellipsis_0.3.2       fs_1.5.2             rstudioapi_0.13     
##  [7] farver_2.1.0         listenv_0.8.0        bit64_4.0.5         
## [10] prodlim_2019.11.13   fansi_1.0.2          lubridate_1.8.0     
## [13] xml2_1.3.3           codetools_0.2-18     splines_4.1.2       
## [16] extrafont_0.17       knitr_1.37           jsonlite_1.7.3      
## [19] pROC_1.18.0          Rttf2pt1_1.3.10      dbplyr_2.1.1        
## [22] compiler_4.1.2       httr_1.4.2           backports_1.4.1     
## [25] assertthat_0.2.1     fastmap_1.1.0        lazyeval_0.2.2      
## [28] cli_3.1.1            htmltools_0.5.2      tools_4.1.2         
## [31] gtable_0.3.0         glue_1.6.1           Rcpp_1.0.8          
## [34] cellranger_1.1.0     jquerylib_0.1.4      vctrs_0.3.8         
## [37] nlme_3.1-153         extrafontdb_1.0      crosstalk_1.2.0     
## [40] iterators_1.0.14     timeDate_3043.102    gower_1.0.0         
## [43] xfun_0.29            globals_0.14.0       lifecycle_1.0.1     
## [46] future_1.23.0        scales_1.1.1         ipred_0.9-12        
## [49] vroom_1.5.7          hms_1.1.1            parallel_4.1.2      
## [52] curl_4.3.2           yaml_2.2.2           gridExtra_2.3       
## [55] gdtools_0.2.4        rpart_4.1-15         stringi_1.7.6       
## [58] highr_0.9            foreach_1.5.2        lava_1.6.10         
## [61] shape_1.4.6          rlang_1.0.1          pkgconfig_2.0.3     
## [64] systemfonts_1.0.4    evaluate_0.14        labeling_0.4.2      
## [67] recipes_0.1.17       htmlwidgets_1.5.4    bit_4.0.4           
## [70] tidyselect_1.1.1     parallelly_1.30.0    plyr_1.8.6          
## [73] magrittr_2.0.2       R6_2.5.1             generics_0.1.2      
## [76] DBI_1.1.2            pillar_1.7.0         haven_2.4.3         
## [79] withr_2.4.3          survival_3.2-13      abind_1.4-5         
## [82] nnet_7.3-16          future.apply_1.8.1   modelr_0.1.8        
## [85] crayon_1.4.2         utf8_1.2.2           tzdb_0.2.0          
## [88] rmarkdown_2.11       grid_4.1.2           data.table_1.14.2   
## [91] ModelMetrics_1.2.2.2 reprex_2.0.1         digest_0.6.29       
## [94] stats4_4.1.2         munsell_0.5.0